#!/bin/bash
# Starts Stage 1 training - trains only the cross-attention adapter
#
# Usage:
# 1. chmod +x scripts/03_train_stage1_adapter.sh
# 2. ./scripts/03_train_stage1_adapter.sh

set -e

echo "Starting Stage 1: Adapter Training..."

python -m src.training.train_stage1_adapter \
 --train_data_dir data/processed/train-clean-100 data/processed/train-clean-360 data/processed/train-other-500 \
 --val_data_dir   data/processed/dev-clean data/processed/dev-other \
 --pretrain_path  pretrained_models/mdm_safetensors/mdm-170M-100e18-rsl-0.01.safetensors \
 --out_dir        out/stage1_adapter_960h \
 --num_devices    4 \
 --batch_size     128 \
 --gradient_accumulation_steps 4 \
 --learning_rate  1e-4 \
 --lr_scaling     linear \
 --lr_max         3e-4 \
 --scheduler_type cosine_epoch \
 --warmup_ratio   0.02 \
 --epochs         80 \
 --patience       8 \
 --weight_decay   0.01 \
 --clip_grad_norm 0.5 \
 --num_workers    16

echo "✅ Stage 1 training script finished."